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subroutine trid (a, k ,n, diag, sdiag) 

! a JC*»3fe^?iJOTH^§S^-*^-f-5 0 daig, sdiag KHSfcf^J^**^ B>J 

constant iblk*—' set block width' 
shared array a (k, n) , diag (n) , sdiag (n) 
allocate ahared array u (n+1 , iblk) ,v (n+1 , iblk) 
• U«yp-y^3^t«rfi l 5^ny i ?*»jjrt, v II w 

c create threads 

create threads 

set nothrd and numthrd 
c nothrd te^* K^fc CO#"^1? 1 — # TH, numthrd=#TH (*Uy K0>&&) 

nb=(n-2+iblk-l) /iblk 

nbase=0 

do i=l , nb-1 

nbase=<i-l)*iblk 

is tart— 1 

nwidth=iblk 

call copy (a, k,n , nbase, nothrd, numthrd) 
c copy 

u (nbase+1 : n , 1 : iblk) *— a (nbase+1 : n , nbase+1 : nbase+xblk) 
call blktrid(a,k,n,diag, sdiag, nbase, is tart ,nwidth, 

u # v,nothrd f numthrd) ■ Zfa 5 

c copy back 

a (nbase+1 : n , nbase+1 : nbase+iblk) «— u (nbase+1 : n, 1 : iblk) 
call update (a , k , n , nbase , nwidth ,u,v, nothrd , numthrd) 
enddo 

nbase= (nb-1) *iblks 
istart=l 
nwidth=n- nbase 

call blktrid(a,k,n,diag, sdiag , nbase , is tart ,nwidth , 
u ( v, nothrd r numthrd) 

return 
end 
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c nbaaeltyP^^^ttg&/Trft7t5/ |>, istart tt?? »B?l>'W UTStf* ft* 

subroutine blktrid (a, k , n,diag, sdiag, nbaae , istart ,nwidth, 

u. , v , nothrd , numthrd) 
shared array a(k 7 n) ,diag(n) , sdiag (n) ,u(n+l,*) ,v(n+l,*) 

i f* < nwi d th<l 0 > then 

call btuni t (a ; k f n , diag f sdiag , nbase , istart , n width , 
u , v r nothxd , numthrd) 

else 

istart2<^i start 
nvidth2 <-nKidth/ 2 

call blktrid (a , k , n , diag , sdiag , nbase , istarrt2 , nwidth2 , 
u,v, nothrd , numthrd) 

BARRIER SYNC 

i s tar t3*-i 3 tart+nwi dth/2 

nwidth3*-nwidth-nwidth/ 2 

is2«— istart2 

ie2«— istart+n>iidth2- 1 

is3«— istart3 

ie3«— istart3+nwid3-l 

ipt r < n base+is tar 3 

len«— (n-iptr+numthrd-l) /numthrd 

is«~iptr+ (nothrd- 1) *len+l 

ie«— min (n , iptr+no thrd*len) 

u<is : ie r is3: ie3) «— u (is: ie, is3 : ie3) 

-u (is : ie , is2 : ie2) *w (is3 : ie3 , is2 : ie2) 1 
-W (is : ie , is2 : ie2 ) *U (ie3 : ie3 , is2 : ie2 ) 1 

BARRIER SYNC 

call blktrid (a, 3c , n , diag , sdiag , nbase , istart3,nwidth3 , 
u , v , no thr d , numthrd) 

endif 

return 

end 
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c blktrid C9I*|£8/U— ^ V 

subroutine btuni t (a , k , n , diag , sdiag , nbase , is tar t , nwicith , 

u , v , nothrd , numthrd) 
shared :: a (k,n) , diag (n) , sdiag (n) ,u (n+1 , * ) ,v (n+1 , *) 
shared : : tap (numthrd) , sigma, alpha 
if (nbase+istart>n-2) then 
return 
endif 

do i=istart, istart- 1+mridth 
ip tr2 *— nl>as e+i 

len*— (n-iptr2+numthrd-l) /numthrd 
is«— iptr2+ (nothrd-1) *len+l 
ie 4 — min (n, iptr2+nothrd*len) 
BARRIER SYNC 

tmp (nothrd) «— u (is ; xe, i> **u<is : ie, i) 

BARRIER SYNC 

if (nothrd=l ) then 

sigma«-sqrt (sum (tmp (1 : numthrd) ) ) ■ SUM l**D, sqrt (2^:618 
diag (iptr2) *— u (iptr2 , i) 
sdiag (iptr2 ) « — sigma 

u (nbase+i+1 , i) «— u (nbase+i+1 ,i) +sign (u (nbase+i+1 , i) *sigaa 
alpha*=l . 0/ (sigma*u (nbase+1+1 , i) ) 
u ( iptr2 , i) =alpha 
endif 

BARRIER SYNC 
iptr3=iptr2+l 
v (is : ie,i) 

<— A (iptr 3 : n , ip tr 2 +i s : iptr 2 +ie) fc * u ( iptr 3 : n , i > 
BARRIER SYNC 

len2«-~ (i-l+numthrd-1) /numthrd 
isx«— (nothrd-1) *len2+l 
iex*- min(i-l , nothrd*len2) 

u (n+1 , isx : iex) « — u (nbase+i+1 :n,isx : iex) fc *u (i+1 : n , i) 
v (n+1 , isx : iex) <— v (nbase+i+1 : n , isx: iex) c *u (i+1 : n , i ) 
BARRIER SYNC 

v (is :ie,i)*-alpha* (v(is: ie, i) -v (is : ie,l : i-1) *u (n+1 , 1 : i-1) fc 

-u(±a:ie,l:i-l)+v<n+l,l:i-l)' ) 
BARRIER SYNC 

tmp (nothrd) «— v(is:ie,i) fc *u (is:ie,i) 

BARRIER SYNC 

if (nothrd=l ) then 

beta*— 0 . 5* alpha* sum (tmp (1 : numthrd) ) 
endif 

BARRIER SYNC 

v(is:ie, i) « — v (is: ie, t) -beta*u(is: ie, i) 
BARRIER SYNC 
if<i<iblk )then 
if (ptr2<n-2 ) then 

u{is: ie r i+l) «— u(is : ie,i+l) -u (is : ie, is tart : i) *v (i+1 f istart: i) * 

-v (is : ie,istart: i) *U (i+1 , istart : i) * 

else 

u(is:ie, i+3: i+2)«-u(is:ie. i+2)-u(is: ie, istart : i)*vCn-i:n, istart : iY 

-v<is:ie, istart : i)*u(n-i:n, istart: i) 1 

return 
endif 
endif 
enddo 

eliminate threads 

return 
end 
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3 nbase v ^ (D^L^n^T^y^V K nwidth p ? r^fe 

subrourine update ( a , k , n , nbase , nwidth , u , v , nothrd , numthrd) 
shared array a (k ,n) ,u (n+1 , *) , v (n+1 , *) 

BARRIER SYNC 

blk<— nwidth 

nbase2 < ~nbase+nwidth 

len<— (n-nbase+2 * num third- 1) / (2 * numthrd) 

xsl*— nba»e+ ( nothrd- 1) *len+l 

iel«— min (n , nba3e+nothrd*len) 

nbase3«— nbase+2 *numthrd*len 

1 er*— nbas©3 -no thrd* len+1 

ier*— min (n , isr+len-1) 

a ( i©l+l : n , isl : iel ) 

«-a : n , isl : iel) -w (iel+1 : n , 1 :blk) *u (isl : iel , 1 : bile) ' 

-u (iel+1 : n , 1 : blk) *w (isl : iel , 1 :blk) * 
a(ier+l :n,isr:ier) 

*-a <ier+l : n , isr : iel) -w (ier+1 : n , 1 :blk) *u (isr : ier , 1 : blk) * 
-u <ier+l :n f l: blk) *w (isr : iel , 1 : blk) * 

call tnnip6^te(a,k,n,isl,iel,u,v,blk> 
call trupdate( afk/n , isr ,i e r,u,v,blk.> 
BARRIER SYNC 

return 

end 



subroutine trupdate( a ,k,n, is, ie,u,v,blk) 
constant blk2<-3tfft :/ n y ^f[5&rffl:/P y#t& 
shared array a (k , n) ,u <n+l , *) r v(n+l , *) 

do i=is,±e,blk2 

ie2*— min ( i+blk2 - 1 , ie ) 
a (i ©2 : ie , is2 : ie2 ) 

<— a ( is2 : ie , is2 : ie2 ) -w <is2 : ie , 1 ,blk) *u (is2 : i©2 , 1 :blk) t 
-u (is2 ; i© , 1 , blk) *w (is2 : i©2 , 1 : blk) t 

enddo 

return 
end 

subroutine copy ( a r y f n # nbase , nothrd, numthrd) 

len«— (n-nbase+2*numthrd-l ) / (2*numthrd) 

isl*— nbase+ (no thrd- 1 ) *len+l 

lenl=max (0,min (n-isl+1 , len) ) 

nbase3*-nbase+2*numthrd*len 

isr<— nbase3-nothrd*len+l 

lenr=max (0 ,min (n-isr+1 , len) ) 

call bandcp(a,k,n,isl,len) 
call bandcp<a f k,n r isr / ier) 

return 
end 
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subroutine bandcp ( a , k. , n , is , len ) 
conctant. nb«— size of small buffer 
private w{nb,nb) 

nn«— min (nb , len) 
loopx*— (len+nn-1) /nn 

do j=l , loopx 
±p«— is+<j-l) *nn 
nl^-len- (j-l) *nn 
nnx* mi n <nn,nl) 
len2«— n-ip+1 

loopy*— <len2+nnx-l) /mix 
is2=is+ ( *nnx 

TKL <w <1 : nnx , 1 : nnx) ) <— TRI* (a(is2 : is2+nnx-l f is2 : is2+nnx) > 
TRU <a <is2 : is2+nnx-l , is : is+nnx) > <— TRL (w <1 :nnx, 1 : nnx) ) 1 

do i—2 , loopy- 1 
is3«— is2+ <i-l) *nnx 

w <1 : nnx , 1 :nnx) *— a (is3 : is3+nnx-l , is2 : is2+nnx) ) 
a <is2 : is2+nnx, is3: ie3+nnx-l) «— w<l , nnx : 1 , nnx) * 
onddo 

is3+(loopy-l) *nnx 
ny«— n-is3+l 

w (1 :ny , 1 r nx) «— a (is3 : n , is2 : is2+nnx) ) 
a (is2 : is2+nnx, is3 : n) <— w(l , ny : 1 , nx) * 

enddo 

return 
end 
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c 3®*tAtf5Ucr>H#-<^ h/^ (ev(l:n,l:nev) \Z.fatfj£tlX^ Sr7C<OfT5lJcD@^ 

c 

subroutine convev<a ( k,n,ev,nev) 
shared array a ( k , n) , ev (i. r n) 

c create threads 

c set nothxd and nutthrd 

c nothrd (t^-^ V V K fc £>#-^T 1 — # TH^ numthrd=#TH 1/ S> K 

BARRIER SYNC 

loir* - (nev+numthrd- 1 ) /numthrd 
is<- (nothrd- 1) *len+l 
ie^-min (nev,nothrd*len) 
nevthrd*— max {ie-is+1 , 0) 

call convevthrd (a , 3t , n , ev <1 /is) , nevthrd) 
BARRIER SYNC 

return 
end 
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subroutine convevthrd ( a , k , n , ev , i width ) 
constat blk«~:/ci 
shared array a(k,n) 
array ev(k,*) 

private w<blk, n> , w2 (blk,blk) 

if <iwidth<0) then 

return 

endif 

numblk= <n-2+blk-l) /blk 
nfbs«— n-2-blk* ( numb lk- 1) 
do i=n-2 ,n-2-nfbs+l , -1 

alphas- a<i,i> 1 alpha « SMMA<WK»fiS*RMtmii%6 
x (1 : iwidth) ^-ev <i+l : n , 1 : iwidth) **a <i+l : n, i) 
ev (i+1 : n , 1 : iwidth) *~ 

ev <i+l : n , 1 : iwidth) +alpha*a (i+1 : n ,i ) *x (1 : iwidth) * 
enddo 

do i=l ,numblk-l 
is«~n-2- <nfbs+i *blk) +1 
ie«— is+blk-1 
w(l: blk, iwidth) 

«— a (is+1 :n r is : ie) t *av (is + 1 :n, 1 : iwidth) 
w (1 : blk-1 , 1 : iwidth) «~w < 1 : blk- 1 , 1 : iwidth) 

+TRL (a (ie+1 : is , is : ie) ) ^av (ie+1 : is , 1 : iwidth) 

DIAG(w2)*-DIAG(a(is:ie,is:i«) ) ! DIAG ^JCO^fftSlR^^ 
do i2=blk,l,-l 
do 11=12-1,1,-1 
w2 (il,12)= 

<-w2<il,il) * (a(is+i2:n,is+i2-l) t *a(is+12:o,is+il-l) ) 

enddo 

enddo 

do il-blk-1,1,-1 
do i2=blk,il+l,-l 

w2 (il , i2) <~w2 (il , i2) +w2 (il , il+1 : 12-1) *w2 (il+l : ±2-1 , i2) 

enddo 

enddo 

do i2=blk,l,-l 
do 11=12-1,1,-1 

w2 (il , 12) <-w2 (il , 12) *w2 <i2 , 12) 

enddo 

enddo 

w(l : blk, 1: iwidth) +- 

w (1 : blk , 1 : iwidth) +TRU (w2 ) *w ( 1 : blk , 1 : iwidth) ? TLU «±3^?T?Uffi^ 
ev (is+1 : n , 1 : iwidth) 

<-a<is+l:n f is:ie) *w {1 : blk , 1 : iwidth) 
ev (ie+1 : is f 1 : iwidth) 

<-TRL (a (ie+1 : is , is : ie-1) ) *w (1 : blk-1 , 1 : iwidth) 
enddo 

return 
end 
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